## R Script Output -------------------------------------------------------------
# Figure 2: The Effect of Chinese FREI Exposure on Two-Party Democratic Vote Shares
# Appendix Table D.1: DiD Regression Results: County-Level Presidential Vote
# Appendix Table D.2: Placebo DiD Regression Results: County-Level Presidential Vote
# Appendix Table D.3: Table D.3: DiD Regression Results: Excluding Outlier Counties


## Instructions ----------------------------------------------------------------
# Step 1: Adjust MAIN_DIR to where README.txt is located
# Step 2: Run entire script


## setup -----------------------------------------------------------------------
# clean slate
rm(list = ls())
date()

# load packages
pkg <- c("tidyverse",
         "RColorBrewer", 
         "gridExtra",
         "tools", 
         "lfe", 
         "stargazer",
         "multcomp",
         "MASS")

lapply(pkg, require, character.only = TRUE)

# set main directory
MAIN_DIR <- "~/Dropbox/Research/ISQ-frei-replication/"


## load data -------------------------------------------------------------------
load(file = paste(MAIN_DIR, "data-merge-county.RData", sep = ""))

# set data frames
df <- county.df

df.treat.1 <- df %>% 
  filter(year == 2012 | year == 2016) 

df.treat.2 <- df %>% 
  filter(year == 2012 | year == 2020) 

df.placebo <- df %>% 
  filter(year == 2008 | year == 2012) 


## set var and var name correspondence -----------------------------------------
var.df <- tibble(var = c("anticorrupt",
                         "placebo_anticorrupt_2012",
                         
                         "anticorrupt:cn_ug_sqmi_cat_2012_bi_high",
                         "anticorrupt:cn_ug_sqmi_cat_2012_tri_medium",
                         "anticorrupt:cn_ug_sqmi_cat_2012_tri_high",
                         "anticorrupt:ln_cn_ug_sqmi_2012",
                         
                         "anticorrupt:cn_g_sqmi_cat_2012_biCN-G-high",
                         "anticorrupt:cn_g_sqmi_cat_2012_triCN-G-medium",
                         "anticorrupt:cn_g_sqmi_cat_2012_triCN-G-high",
                         
                         "anticorrupt:ind_ug_sqmi_cat_2012_biIND-UG-high",
                         "anticorrupt:ind_ug_sqmi_cat_2012_triIND-UG-medium",
                         "anticorrupt:ind_ug_sqmi_cat_2012_triIND-UG-high",
                         
                         "placebo_anticorrupt_2012:cn_ug_sqmi_cat_2012_bi_high",
                         "placebo_anticorrupt_2012:cn_ug_sqmi_cat_2012_tri_medium",
                         "placebo_anticorrupt_2012:cn_ug_sqmi_cat_2012_tri_high",
                         
                         "placebo_anticorrupt_2012:cn_g_sqmi_cat_2012_biCN-G-high",
                         "placebo_anticorrupt_2012:cn_g_sqmi_cat_2012_triCN-G-medium",
                         "placebo_anticorrupt_2012:cn_g_sqmi_cat_2012_triCN-G-high",
                         
                         "placebo_anticorrupt_2012:ind_ug_sqmi_cat_2012_biIND-UG-high",
                         "placebo_anticorrupt_2012:ind_ug_sqmi_cat_2012_triIND-UG-medium",
                         "placebo_anticorrupt_2012:ind_ug_sqmi_cat_2012_triIND-UG-high",
                         
                         # controls
                         "share_pop_county_female",
                         "share_pop_5_17_county",
                         "share_pop_18_24_county",
                         "share_pop_25_34_county",
                         "share_pop_35_44_county",
                         "share_pop_45_54_county",
                         "share_pop_55_64_county",
                         "share_pop_65_county",
                         
                         "share_pop_white_county",
                         "share_pop_black_county",
                         "share_pop_hispanic_county",
                         "share_pop_asian_county",
                         "share_pop_cn_county",
                         
                         "share_foreign_born_county",
                         "share_edu_ba_above_county",
                         "share_enroll_county",
                         "ln_pop_density_county",
                         "effective_tax_county", 
                         "ipw_county",
                         "employ_rate_county", 
                         "median_household_income_county",
                         "vacancy_county",
                         
                         # trending effects of controls
                         "anticorrupt:share_pop_county_female",
                         "anticorrupt:share_pop_5_17_county",
                         "anticorrupt:share_pop_18_24_county",
                         "anticorrupt:share_pop_25_34_county",
                         "anticorrupt:share_pop_35_44_county",
                         "anticorrupt:share_pop_45_54_county",
                         "anticorrupt:share_pop_55_64_county",
                         "anticorrupt:share_pop_65_county",
                         
                         "anticorrupt:share_pop_white_county",
                         "anticorrupt:share_pop_black_county",
                         "anticorrupt:share_pop_hispanic_county",
                         "anticorrupt:share_pop_asian_county",
                         "anticorrupt:share_pop_cn_county",
                         
                         "anticorrupt:share_foreign_born_county",
                         "anticorrupt:share_edu_ba_above_county",
                         "anticorrupt:share_enroll_county",
                         "anticorrupt:ln_pop_density_county",
                         "anticorrupt:effective_tax_county", 
                         "anticorrupt:ipw_county",
                         "anticorrupt:employ_rate_county", 
                         "anticorrupt:median_household_income_county",
                         "anticorrupt:vacancy_county",
                         
                         # trending effects of controls in placebo period
                         "placebo_anticorrupt_2012:share_pop_county_female",
                         "placebo_anticorrupt_2012:share_pop_5_17_county",
                         "placebo_anticorrupt_2012:share_pop_18_24_county",
                         "placebo_anticorrupt_2012:share_pop_25_34_county",
                         "placebo_anticorrupt_2012:share_pop_35_44_county",
                         "placebo_anticorrupt_2012:share_pop_45_54_county",
                         "placebo_anticorrupt_2012:share_pop_55_64_county",
                         "placebo_anticorrupt_2012:share_pop_65_county",
                         
                         "placebo_anticorrupt_2012:share_pop_white_county",
                         "placebo_anticorrupt_2012:share_pop_black_county",
                         "placebo_anticorrupt_2012:share_pop_hispanic_county",
                         "placebo_anticorrupt_2012:share_pop_asian_county",
                         "placebo_anticorrupt_2012:share_pop_cn_county",
                         
                         "placebo_anticorrupt_2012:share_foreign_born_county",
                         "placebo_anticorrupt_2012:share_edu_ba_above_county",
                         "placebo_anticorrupt_2012:share_enroll_county",
                         "placebo_anticorrupt_2012:ln_pop_density_county",
                         "placebo_anticorrupt_2012:effective_tax_county", 
                         "placebo_anticorrupt_2012:ipw_county",
                         "placebo_anticorrupt_2012:employ_rate_county", 
                         "placebo_anticorrupt_2012:median_household_income_county",
                         "placebo_anticorrupt_2012:vacancy_county"),
                 var_name = c("2013 Anti-corruption Campaign (Dummy)",
                              "Placebo Anti-corruption (2012)",
                              
                              "2013 Anti-corruption X CN UG in 2012 (Dummy, High)",
                              "2013 Anti-corruption X CN UG in 2012 (Tri., Medium)",
                              "2013 Anti-corruption X CN UG in 2012 (Tri., High)",
                              "2013 Anti-corruption X CN UG in 2012 (Continuous)",
                              
                              "2013 Anti-corruption X CN G in 2012 (Dummy, High)",
                              "2013 Anti-corruption X CN G in 2012 (Tri., Medium)",
                              "2013 Anti-corruption X CN G in 2012 (Tri., High)",
                              
                              "2013 Anti-corruption X IND UG in 2012 (Dummy, High)",
                              "2013 Anti-corruption X IND UG in 2012 (Tri., Medium)",
                              "2013 Anti-corruption X IND UG in 2012 (Tri., High)",
                              
                              "Placebo Anti-corruption (2012) X CN UG in 2012 (Dummy, High)",
                              "Placebo Anti-corruption (2012) X CN UG in 2012 (Tri., Medium)",
                              "Placebo Anti-corruption (2012) X CN UG in 2012 (Tri., High)",
                              
                              "Placebo Anti-corruption (2012) X CN G in 2012 (Dummy, High)",
                              "Placebo Anti-corruption (2012) X CN G in 2012 (Tri., Medium)",
                              "Placebo Anti-corruption (2012) X CN G in 2012 (Tri., High)",
                              
                              "Placebo Anti-corruption (2012) X IND UG in 2012 (Dummy, High)",
                              "Placebo Anti-corruption (2012) X IND UG in 2012 (Tri., Medium)",
                              "Placebo Anti-corruption (2012) X IND UG in 2012 (Tri., High)",
                              
                              # controls
                              "Share of Female Pop.",
                              "Share of Pop. 5-17",
                              "Share of Pop. 18-24",
                              "Share of Pop. 25-34",
                              "Share of Pop. 35-44",
                              "Share of Pop. 45-54",
                              "Share of Pop. 55-64",
                              "Share of Pop. 65-",
                              
                              "Share of White Pop.",
                              "Share of Black Pop.",
                              "Share of Hispanic Pop.",
                              "Share of Asian Pop.",
                              "Share of Chinese Pop.",
                              
                              "Share of Foreign-Born Pop.",
                              "Share of Pop. with BA Degree or Above",
                              "Share of Pop. Enrolled in College or Above",
                              "Population Density (log)",
                              "Effective Tax Rate (\\%)",
                              "Trade Exposure (IPW)",
                              "Employment Rate",
                              "Median Household Income (10,000)",
                              "Share of Vacant Houses",
                              
                              # trending effects of controls
                              "2013 Anti-corruption X Share of Female Pop.",
                              "2013 Anti-corruption X Share of Pop. 5--17",
                              "2013 Anti-corruption X Share of Pop. 18--24",
                              "2013 Anti-corruption X Share of Pop. 25--34",
                              "2013 Anti-corruption X Share of Pop. 35--44",
                              "2013 Anti-corruption X Share of Pop. 45--54",
                              "2013 Anti-corruption X Share of Pop. 55--64",
                              "2013 Anti-corruption X Share of Pop. 65--",
                              
                              "2013 Anti-corruption X Share of White Pop.",
                              "2013 Anti-corruption X Share of Black Pop.",
                              "2013 Anti-corruption X Share of Hispanic Pop.",
                              "2013 Anti-corruption X Share of Asian Pop.",
                              "2013 Anti-corruption X Share of Chinese Pop.",
                              
                              "2013 Anti-corruption X Share of Foreign-Born Pop.",
                              "2013 Anti-corruption X Share of Pop. with BA Degree or Above",
                              "2013 Anti-corruption X Share of Pop. Enrolled in College or Above",
                              "2013 Anti-corruption X Population Density (log)",
                              "2013 Anti-corruption X Effective Tax Rate (\\%)",
                              "2013 Anti-corruption X Trade Exposure (IPW)",
                              "2013 Anti-corruption X Employment Rate",
                              "2013 Anti-corruption X Median Household Income (10,000)",
                              "2013 Anti-corruption X Share of Vacant Houses",
                              
                              # trending effects of controls in placebo period
                              "Placebo Anti-corruption (2012) X Share of Female Pop.",
                              "Placebo Anti-corruption (2012) X Share of Pop. 5--17",
                              "Placebo Anti-corruption (2012) X Share of Pop. 18--24",
                              "Placebo Anti-corruption (2012) X Share of Pop. 25--34",
                              "Placebo Anti-corruption (2012) X Share of Pop. 35--44",
                              "Placebo Anti-corruption (2012) X Share of Pop. 45--54",
                              "Placebo Anti-corruption (2012) X Share of Pop. 55--64",
                              "Placebo Anti-corruption (2012) X Share of Pop. 65--",
                              
                              "Placebo Anti-corruption (2012) X Share of White Pop.",
                              "Placebo Anti-corruption (2012) X Share of Black Pop.",
                              "Placebo Anti-corruption (2012) X Share of Hispanic Pop.",
                              "Placebo Anti-corruption (2012) X Share of Asian Pop.",
                              "Placebo Anti-corruption (2012) X Share of CN Pop.",
                              
                              "Placebo Anti-corruption (2012) X Share of Foreign-Born Pop.",
                              "Placebo Anti-corruption (2012) X Share of Pop. with BA Degree or Above",
                              "Placebo Anti-corruption (2012) X Share of Pop. Enrolled in College or Above",
                              "Placebo Anti-corruption (2012) X Population Density (log)",
                              "Placebo Anti-corruption (2012) X Effective Tax Rate (\\%)",
                              "Placebo Anti-corruption (2012) X Trade Exposure (IPW)",
                              "Placebo Anti-corruption (2012) X Employment Rate",
                              "Placebo Anti-corruption (2012) X Median Household Income (10,000)",
                              "Placebo Anti-corruption (2012) X Share of Vacant Houses"))


## set function to replace variable names in tables ----------------------------
replaceVarName <- function(var.vec, var.df){
  # Prepare output vector
  out.vec <- rep(NA, length(var.vec))
  matches <- match(var.vec, var.df$var)
  out.vec <- var.df[matches,]$var_name
  
  if(any(is.na(out.vec))){
    warning(paste("Variable concordence missing: ", 
                  paste(var.vec[is.na(out.vec)], collapse = ", "), 
                  sep = ""))
  } else{
    #print("All variables successfully converted")
  }
  
  return(out.vec)
}


## DiD analysis ----------------------------------------------------------------
## period: 2012 vs. 2016
# base model
did.12.16.bi.base <- felm(dem_share ~ 
                            anticorrupt + 
                            cn_ug_sqmi_cat_2012_bi_high:anticorrupt
                          
                          | county_fips | 0 | county_fips, 
                          keepX = TRUE,
                          data = df.treat.1)

summary(did.12.16.bi.base)

# extended model
did.12.16.bi.ext <- felm(dem_share ~ 
                           anticorrupt + 
                           cn_ug_sqmi_cat_2012_bi_high:anticorrupt +
                           
                           cn_g_sqmi_cat_2012_bi:anticorrupt +
                           ind_ug_sqmi_cat_2012_bi:anticorrupt
                         
                         | county_fips | 0 | county_fips, 
                         keepX = TRUE,
                         data = df.treat.1)

summary(did.12.16.bi.ext)

# full model
did.12.16.bi.full <- felm(dem_share ~ 
                            anticorrupt + 
                            cn_ug_sqmi_cat_2012_bi_high:anticorrupt +
                            
                            cn_g_sqmi_cat_2012_bi:anticorrupt +
                            ind_ug_sqmi_cat_2012_bi:anticorrupt +

                            share_pop_county_female * anticorrupt +
                          
                            share_pop_5_17_county * anticorrupt +
                            share_pop_18_24_county * anticorrupt + 
                            share_pop_25_34_county * anticorrupt +
                            share_pop_35_44_county * anticorrupt +
                            share_pop_45_54_county * anticorrupt +
                            share_pop_55_64_county * anticorrupt +
                            share_pop_65_county * anticorrupt + 
                            
                            share_pop_white_county * anticorrupt + 
                            share_pop_black_county * anticorrupt + 
                            share_pop_hispanic_county * anticorrupt + 
                            share_pop_asian_county * anticorrupt + 
                            share_pop_cn_county * anticorrupt + 
                            
                            share_foreign_born_county * anticorrupt + 
                            
                            share_edu_ba_above_county * anticorrupt +
                            share_enroll_county * anticorrupt + 
                            ln_pop_density_county * anticorrupt + 
                            
                            effective_tax_county * anticorrupt + 
                            
                            ipw_county * anticorrupt + 
                            
                            employ_rate_county * anticorrupt + 
                            median_household_income_county * anticorrupt + 
                            vacancy_county * anticorrupt 
                          | county_fips | 0 | county_fips, 
                          keepX = TRUE,
                          data = df.treat.1)

summary(did.12.16.bi.full)

# check typical county in terms of Dem. share
check.dem.share <- df.treat.1 %>% 
  filter(year == 2012) %>%
  dplyr::select(county_fips, county_name, state_name, year, dem_share) %>%
  arrange(dem_share)

median(check.dem.share$dem_share)
coef(did.12.16.bi.full)["anticorrupt:cn_ug_sqmi_cat_2012_bi_high"] / median(check.dem.share$dem_share)

# effect size in s.d.
sd(check.dem.share$dem_share)
coef(did.12.16.bi.full)["anticorrupt:cn_ug_sqmi_cat_2012_bi_high"] / sd(check.dem.share$dem_share)


# trichotomous full model
did.12.16.tri.full <- felm(dem_share ~ 
                             anticorrupt + 
                             cn_ug_sqmi_cat_2012_tri_medium:anticorrupt +
                             cn_ug_sqmi_cat_2012_tri_high:anticorrupt +
                             
                             cn_g_sqmi_cat_2012_tri:anticorrupt +
                             ind_ug_sqmi_cat_2012_tri:anticorrupt +
                             
                             share_pop_county_female * anticorrupt +
                             
                             share_pop_5_17_county * anticorrupt +
                             share_pop_18_24_county * anticorrupt + 
                             share_pop_25_34_county * anticorrupt +
                             share_pop_35_44_county * anticorrupt +
                             share_pop_45_54_county * anticorrupt +
                             share_pop_55_64_county * anticorrupt +
                             share_pop_65_county * anticorrupt + 
                             
                             share_pop_white_county * anticorrupt + 
                             share_pop_black_county * anticorrupt + 
                             share_pop_hispanic_county * anticorrupt + 
                             share_pop_asian_county * anticorrupt + 
                             share_pop_cn_county * anticorrupt + 
                             
                             share_foreign_born_county * anticorrupt + 
                             
                             share_edu_ba_above_county * anticorrupt +
                             share_enroll_county * anticorrupt + 
                             ln_pop_density_county * anticorrupt + 
                             
                             effective_tax_county * anticorrupt + 
                             
                             ipw_county * anticorrupt + 
                             
                             employ_rate_county * anticorrupt + 
                             median_household_income_county * anticorrupt + 
                             vacancy_county * anticorrupt 
                           | county_fips | 0 | county_fips, 
                           keepX = TRUE,
                           data = df.treat.1)

summary(did.12.16.tri.full)
n_distinct(did.12.16.tri.full$fe$county_fips)
confint(did.12.16.tri.full) * 100

# continuous full model
did.12.16.con.full <- felm(dem_share ~
                             anticorrupt +
                             ln_cn_ug_sqmi_2012:anticorrupt + 
                             
                             cn_g_sqmi_cat_2012_bi:anticorrupt +
                             ind_ug_sqmi_cat_2012_bi:anticorrupt +
                             
                             share_pop_county_female * anticorrupt +
                             
                             share_pop_5_17_county * anticorrupt +
                             share_pop_18_24_county * anticorrupt +
                             share_pop_25_34_county * anticorrupt +
                             share_pop_35_44_county * anticorrupt +
                             share_pop_45_54_county * anticorrupt +
                             share_pop_55_64_county * anticorrupt +
                             share_pop_65_county * anticorrupt +
                             
                             share_pop_white_county * anticorrupt +
                             share_pop_black_county * anticorrupt +
                             share_pop_hispanic_county * anticorrupt +
                             share_pop_asian_county * anticorrupt +
                             share_pop_cn_county * anticorrupt +
                             
                             share_foreign_born_county * anticorrupt +
                             
                             share_edu_ba_above_county * anticorrupt +
                             share_enroll_county * anticorrupt +
                             ln_pop_density_county * anticorrupt +
                             
                             effective_tax_county * anticorrupt +
                             
                             ipw_county * anticorrupt +
                             
                             employ_rate_county * anticorrupt +
                             median_household_income_county * anticorrupt +
                             vacancy_county * anticorrupt
                           | county_fips | 0 | county_fips,
                           keepX = TRUE,
                           data = df.treat.1)

summary(did.12.16.con.full)


## DiD analysis ----------------------------------------------------------------
## period: 2012 vs. 2020
# dichotomous full model
did.12.20.bi.full <- felm(dem_share ~ 
                            anticorrupt + 
                            cn_ug_sqmi_cat_2012_bi_high:anticorrupt +
                            
                            cn_g_sqmi_cat_2012_bi:anticorrupt +
                            ind_ug_sqmi_cat_2012_bi:anticorrupt +
                            
                            share_pop_county_female * anticorrupt +
                            
                            share_pop_5_17_county * anticorrupt +
                            share_pop_18_24_county * anticorrupt +
                            share_pop_25_34_county * anticorrupt +
                            share_pop_35_44_county * anticorrupt +
                            share_pop_45_54_county * anticorrupt +
                            share_pop_55_64_county * anticorrupt +
                            share_pop_65_county * anticorrupt + 
                            
                            share_pop_white_county * anticorrupt + 
                            share_pop_black_county * anticorrupt + 
                            share_pop_hispanic_county * anticorrupt + 
                            share_pop_asian_county * anticorrupt + 
                            share_pop_cn_county * anticorrupt + 
                            
                            share_foreign_born_county * anticorrupt + 
                            
                            share_edu_ba_above_county * anticorrupt +
                            share_enroll_county * anticorrupt + 
                            ln_pop_density_county * anticorrupt + 
                            
                            effective_tax_county * anticorrupt + 
                            
                            ipw_county * anticorrupt + 
                            
                            employ_rate_county * anticorrupt + 
                            median_household_income_county * anticorrupt +
                            vacancy_county * anticorrupt
                          | county_fips | 0 | county_fips, 
                          keepX = TRUE,
                          data = df.treat.2)

summary(did.12.20.bi.full)
n_distinct(did.12.20.bi.full$fe$county_fips)
confint(did.12.20.bi.full) * 100


# trichotomous full model
did.12.20.tri.full <- felm(dem_share ~ 
                            anticorrupt + 
                            cn_ug_sqmi_cat_2012_tri_medium:anticorrupt +
                            cn_ug_sqmi_cat_2012_tri_high:anticorrupt +
                            
                            cn_g_sqmi_cat_2012_tri:anticorrupt +
                            ind_ug_sqmi_cat_2012_tri:anticorrupt +
                            
                            share_pop_county_female * anticorrupt +
                            
                            share_pop_5_17_county * anticorrupt +
                            share_pop_18_24_county * anticorrupt + 
                            share_pop_25_34_county * anticorrupt +
                            share_pop_35_44_county * anticorrupt +
                            share_pop_45_54_county * anticorrupt +
                            share_pop_55_64_county * anticorrupt +
                            share_pop_65_county * anticorrupt + 
                            
                            share_pop_white_county * anticorrupt + 
                            share_pop_black_county * anticorrupt + 
                            share_pop_hispanic_county * anticorrupt + 
                            share_pop_asian_county * anticorrupt + 
                            share_pop_cn_county * anticorrupt + 
                            
                            share_foreign_born_county * anticorrupt + 
                            
                            share_edu_ba_above_county * anticorrupt +
                            share_enroll_county * anticorrupt + 
                            ln_pop_density_county * anticorrupt + 
                            
                            effective_tax_county * anticorrupt + 
                            
                            ipw_county * anticorrupt + 
                            
                            employ_rate_county * anticorrupt + 
                            median_household_income_county * anticorrupt + 
                            vacancy_county * anticorrupt 
                          | county_fips | 0 | county_fips, 
                          keepX = TRUE,
                          data = df.treat.2)

summary(did.12.20.tri.full)
n_distinct(did.12.20.tri.full$fe$county_fips)
confint(did.12.20.tri.full) * 100


# continuous full model
did.12.20.con.full <- felm(dem_share ~
                             anticorrupt +
                             ln_cn_ug_sqmi_2012:anticorrupt +
                             
                             cn_g_sqmi_cat_2012_bi:anticorrupt +
                             ind_ug_sqmi_cat_2012_bi:anticorrupt +
                             
                             share_pop_county_female * anticorrupt +
                             
                             share_pop_5_17_county * anticorrupt +
                             share_pop_18_24_county * anticorrupt +
                             share_pop_25_34_county * anticorrupt +
                             share_pop_35_44_county * anticorrupt +
                             share_pop_45_54_county * anticorrupt +
                             share_pop_55_64_county * anticorrupt +
                             share_pop_65_county * anticorrupt +
                             
                             share_pop_white_county * anticorrupt +
                             share_pop_black_county * anticorrupt +
                             share_pop_hispanic_county * anticorrupt +
                             share_pop_asian_county * anticorrupt +
                             share_pop_cn_county * anticorrupt +
                             
                             share_foreign_born_county * anticorrupt +
                             
                             share_edu_ba_above_county * anticorrupt +
                             share_enroll_county * anticorrupt +
                             ln_pop_density_county * anticorrupt +
                             
                             effective_tax_county * anticorrupt +
                             
                             ipw_county * anticorrupt +
                             
                             employ_rate_county * anticorrupt +
                             median_household_income_county * anticorrupt +
                             vacancy_county * anticorrupt
                           | county_fips | 0 | county_fips,
                           keepX = TRUE,
                           data = df.treat.2)

summary(did.12.20.con.full)


## Appendix Table D.1: DiD Regression Results: County-Level Presidential Vote -----
# set outputs
did.main <- list(did.12.16.bi.base,
                 did.12.16.bi.ext,
                 did.12.16.bi.full,
                 did.12.16.tri.full,
                 did.12.16.con.full,
                 did.12.20.bi.full,
                 did.12.20.tri.full,
                 did.12.20.con.full)

# extract clustered SEs
did.main.cse <- list(did.12.16.bi.base$cse,
                     did.12.16.bi.ext$cse,
                     did.12.16.bi.full$cse,
                     did.12.16.tri.full$cse,
                     did.12.16.con.full$cse,
                     did.12.20.bi.full$cse,
                     did.12.20.tri.full$cse,
                     did.12.20.con.full$cse)

# set model names
mod.names.main <- c("12-16-base", 
                    "12-16-ext", 
                    "12-16-full",
                    "12-16-full-tri",
                    "12-16-full-con",
                    "12-20-full",
                    "12-20-full-tri",
                    "12-20-full-con")

# set variable order
var.order.main <- c("^anticorrupt$",
                    "^anticorrupt:cn_ug_sqmi_cat_2012_bi_high$",
                    "^anticorrupt:cn_ug_sqmi_cat_2012_tri_medium$",
                    "^anticorrupt:cn_ug_sqmi_cat_2012_tri_high$",
                    "^anticorrupt:ln_cn_ug_sqmi_2012$",
                    
                    "^anticorrupt:cn_g_sqmi_cat_2012_biCN-G-high$",
                    "^anticorrupt:cn_g_sqmi_cat_2012_triCN-G-medium$",
                    "^anticorrupt:cn_g_sqmi_cat_2012_triCN-G-high$",
                    
                    "^anticorrupt:ind_ug_sqmi_cat_2012_biIND-UG-high$",
                    "^anticorrupt:ind_ug_sqmi_cat_2012_triIND-UG-medium$",
                    "^anticorrupt:ind_ug_sqmi_cat_2012_triIND-UG-high$",
                    
                    "^share_pop_county_female$",
                    "^share_pop_5_17_county$",
                    "^share_pop_18_24_county$",
                    "^share_pop_25_34_county$",
                    "^share_pop_35_44_county$",
                    "^share_pop_45_54_county$",
                    "^share_pop_55_64_county$",
                    "^share_pop_65_county$",
                    
                    "^share_pop_white_county$",
                    "^share_pop_black_county$",
                    "^share_pop_hispanic_county$",
                    "^share_pop_asian_county$",
                    "^share_pop_cn_county$",
                    
                    "^share_foreign_born_county$",
                    "^share_edu_ba_above_county$",
                    "^share_enroll_county$",
                    "^ln_pop_density_county$",
                    "^effective_tax_county$", 
                    "^ipw_county$",
                    "^employ_rate_county$", 
                    "^median_household_income_county$",
                    "^vacancy_county$",
                    
                    "^anticorrupt:share_pop_county_female$",
                    "^anticorrupt:share_pop_5_17_county$",
                    "^anticorrupt:share_pop_18_24_county$",
                    "^anticorrupt:share_pop_25_34_county$",
                    "^anticorrupt:share_pop_35_44_county$",
                    "^anticorrupt:share_pop_45_54_county$",
                    "^anticorrupt:share_pop_55_64_county$",
                    "^anticorrupt:share_pop_65_county$",
                    
                    "^anticorrupt:share_pop_white_county$",
                    "^anticorrupt:share_pop_black_county$",
                    "^anticorrupt:share_pop_hispanic_county$",
                    "^anticorrupt:share_pop_asian_county$",
                    "^anticorrupt:share_pop_cn_county$",
                    
                    "^anticorrupt:share_foreign_born_county$",
                    "^anticorrupt:share_edu_ba_above_county$",
                    "^anticorrupt:share_enroll_county$",
                    "^anticorrupt:ln_pop_density_county$",
                    "^anticorrupt:effective_tax_county$", 
                    "^anticorrupt:ipw_county$",
                    "^anticorrupt:employ_rate_county$", 
                    "^anticorrupt:median_household_income_county$",
                    "^anticorrupt:vacancy_county$")

# set var label
var.label.main <- str_replace_all(var.order.main, "\\^", "")
var.label.main <- str_replace_all(var.label.main, "\\$", "")

# save
sink(file.path(MAIN_DIR, "Table-D1.txt"))
#sink(file.path(MAIN_DIR, "Table-D1.tex"))
stargazer(did.main,
          #type = "latex",
          type = "text",
          title = "DiD Regression Results: County-Level Presidential Vote",
          dep.var.labels = "Two-Party Democratic Vote Share",
          dep.var.labels.include = TRUE,
          column.labels = mod.names.main,
          order = var.order.main,
          covariate.labels = replaceVarName(var.vec = var.label.main,
                                            var.df = var.df),
          add.lines = list(c("Fixed Effects: County",
                             rep("Yes", 8)),
                           c("Number of Counties",
                             formatC(n_distinct(did.12.16.bi.base$fe$county_fips), big.mark = ","),
                             formatC(n_distinct(did.12.16.bi.ext$fe$county_fips), big.mark = ","),
                             formatC(n_distinct(did.12.16.bi.full$fe$county_fips), big.mark = ","),
                             formatC(n_distinct(did.12.16.con.full$fe$county_fips), big.mark = ","),
                             formatC(n_distinct(did.12.16.tri.full$fe$county_fips), big.mark = ","),
                             formatC(n_distinct(did.12.20.bi.full$fe$county_fips), big.mark = ","),
                             formatC(n_distinct(did.12.20.tri.full$fe$county_fips), big.mark = ","),
                             formatC(n_distinct(did.12.20.con.full$fe$county_fips), big.mark = ",")
                             )),
          label = "tb:did-pres-main",
          single.row = FALSE,
          se = did.main.cse,
          # no.space = TRUE,
          df = FALSE,
          font.size = "tiny",
          star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          star.char = c("+", "*", "**", "***"),
          notes.align = "l",
          notes.append = FALSE)
sink()


## Placebo DID analysis --------------------------------------------------------
## period: 2008 vs. 2012
# base
did.placebo.bi.base <- felm(dem_share ~ 
                              placebo_anticorrupt_2012 + 
                              
                              cn_ug_sqmi_cat_2012_bi_high:placebo_anticorrupt_2012 
                            | county_fips | 0 | county_fips, 
                            keepX = TRUE,
                            data = df.placebo)

summary(did.placebo.bi.base)
n_distinct(did.placebo.bi.base$fe$county_fips)

# dichotomous full
did.placebo.bi.full <- felm(dem_share ~ 
                              placebo_anticorrupt_2012 + 
                              
                              cn_ug_sqmi_cat_2012_bi_high:placebo_anticorrupt_2012 +
                              
                              cn_g_sqmi_cat_2012_bi:placebo_anticorrupt_2012 +
                              ind_ug_sqmi_cat_2012_bi:placebo_anticorrupt_2012 +
                              
                              share_pop_county_female * placebo_anticorrupt_2012 +
                              
                              share_pop_5_17_county * placebo_anticorrupt_2012 +
                              share_pop_18_24_county * placebo_anticorrupt_2012 + 
                              share_pop_25_34_county * placebo_anticorrupt_2012 +
                              share_pop_35_44_county * placebo_anticorrupt_2012 +
                              share_pop_45_54_county * placebo_anticorrupt_2012 +
                              share_pop_55_64_county * placebo_anticorrupt_2012 +
                              share_pop_65_county * placebo_anticorrupt_2012 + 
                              
                              share_pop_white_county * placebo_anticorrupt_2012 + 
                              share_pop_black_county * placebo_anticorrupt_2012 + 
                              share_pop_hispanic_county * placebo_anticorrupt_2012 + 
                              share_pop_asian_county * placebo_anticorrupt_2012 + 
                              share_pop_cn_county * placebo_anticorrupt_2012 + 
                              
                              share_foreign_born_county * placebo_anticorrupt_2012 + 
                              
                              share_edu_ba_above_county * placebo_anticorrupt_2012 +
                              share_enroll_county * placebo_anticorrupt_2012 + 
                              ln_pop_density_county * placebo_anticorrupt_2012 + 
                              
                              effective_tax_county * placebo_anticorrupt_2012 + 
                              
                              ipw_county * placebo_anticorrupt_2012 + 
                              
                              employ_rate_county * placebo_anticorrupt_2012 + 
                              median_household_income_county * placebo_anticorrupt_2012 + 
                              vacancy_county * placebo_anticorrupt_2012 
                            | county_fips | 0 | county_fips, 
                            keepX = TRUE,
                            data = df.placebo)

summary(did.placebo.bi.full)
n_distinct(did.placebo.bi.full$fe$county_fips)
confint(did.placebo.bi.full) * 100

# trichotomous full
did.placebo.tri.full <- felm(dem_share ~ 
                               placebo_anticorrupt_2012 + 
                               
                               cn_ug_sqmi_cat_2012_tri_medium:placebo_anticorrupt_2012 +
                               cn_ug_sqmi_cat_2012_tri_high:placebo_anticorrupt_2012 +
                               
                               cn_g_sqmi_cat_2012_tri:placebo_anticorrupt_2012 +
                               ind_ug_sqmi_cat_2012_tri:placebo_anticorrupt_2012 +
                               
                               share_pop_county_female * placebo_anticorrupt_2012 +
                               
                               share_pop_5_17_county * placebo_anticorrupt_2012 +
                               share_pop_18_24_county * placebo_anticorrupt_2012 + 
                               share_pop_25_34_county * placebo_anticorrupt_2012 +
                               share_pop_35_44_county * placebo_anticorrupt_2012 +
                               share_pop_45_54_county * placebo_anticorrupt_2012 +
                               share_pop_55_64_county * placebo_anticorrupt_2012 +
                               share_pop_65_county * placebo_anticorrupt_2012 + 
                               
                               share_pop_white_county * placebo_anticorrupt_2012 + 
                               share_pop_black_county * placebo_anticorrupt_2012 + 
                               share_pop_hispanic_county * placebo_anticorrupt_2012 + 
                               share_pop_asian_county * placebo_anticorrupt_2012 + 
                               share_pop_cn_county * placebo_anticorrupt_2012 + 
                               
                               share_foreign_born_county * placebo_anticorrupt_2012 + 
                               
                               share_edu_ba_above_county * placebo_anticorrupt_2012 +
                               share_enroll_county * placebo_anticorrupt_2012 +
                               ln_pop_density_county * placebo_anticorrupt_2012 + 
                               
                               effective_tax_county * placebo_anticorrupt_2012 + 
                               
                               ipw_county * placebo_anticorrupt_2012 + 
                               
                               employ_rate_county * placebo_anticorrupt_2012 + 
                               median_household_income_county * placebo_anticorrupt_2012 + 
                               vacancy_county * placebo_anticorrupt_2012 
                             | county_fips | 0 | county_fips, 
                             keepX = TRUE,
                             data = df.placebo)

summary(did.placebo.tri.full)


## Appendix Table D.2: Placebo DiD Regression Results: County-Level Presidential Vote -----
# set outputs
did.placebo <- list(did.placebo.bi.base,
                    did.placebo.bi.full,
                    did.placebo.tri.full)

# set clustered SEs
did.placebo.cse <- list(did.placebo.bi.base$cse,
                        did.placebo.bi.full$cse,
                        did.placebo.tri.full$cse)

# set model names
mod.names.placebo <- c("08-12-base",
                       "08-12-full",
                       "08-12-full-tri")

# set var label
var.label.placebo <- c("placebo_anticorrupt_2012",
                       "placebo_anticorrupt_2012:cn_ug_sqmi_cat_2012_bi_high",
                       "placebo_anticorrupt_2012:cn_ug_sqmi_cat_2012_tri_medium",
                       "placebo_anticorrupt_2012:cn_ug_sqmi_cat_2012_tri_high",
                       
                       "placebo_anticorrupt_2012:cn_g_sqmi_cat_2012_biCN-G-high",
                       "placebo_anticorrupt_2012:cn_g_sqmi_cat_2012_triCN-G-medium",
                       "placebo_anticorrupt_2012:cn_g_sqmi_cat_2012_triCN-G-high",
                       
                       "placebo_anticorrupt_2012:ind_ug_sqmi_cat_2012_biIND-UG-high",
                       "placebo_anticorrupt_2012:ind_ug_sqmi_cat_2012_triIND-UG-medium",
                       "placebo_anticorrupt_2012:ind_ug_sqmi_cat_2012_triIND-UG-high",
                       
                       "share_pop_county_female",
                       "share_pop_5_17_county",
                       "share_pop_18_24_county",
                       "share_pop_25_34_county",
                       "share_pop_35_44_county",
                       "share_pop_45_54_county",
                       "share_pop_55_64_county",
                       "share_pop_65_county",
                       
                       "share_pop_white_county",
                       "share_pop_black_county",
                       "share_pop_hispanic_county",
                       "share_pop_asian_county",
                       "share_pop_cn_county",
                       
                       "share_foreign_born_county",
                       "share_edu_ba_above_county",
                       "share_enroll_county",
                       "ln_pop_density_county",
                       "effective_tax_county", 
                       "ipw_county",
                       "employ_rate_county", 
                       "median_household_income_county",
                       "vacancy_county",
                       
                       "placebo_anticorrupt_2012:share_pop_county_female",
                       "placebo_anticorrupt_2012:share_pop_5_17_county",
                       "placebo_anticorrupt_2012:share_pop_18_24_county",
                       "placebo_anticorrupt_2012:share_pop_25_34_county",
                       "placebo_anticorrupt_2012:share_pop_35_44_county",
                       "placebo_anticorrupt_2012:share_pop_45_54_county",
                       "placebo_anticorrupt_2012:share_pop_55_64_county",
                       "placebo_anticorrupt_2012:share_pop_65_county",
                       
                       "placebo_anticorrupt_2012:share_pop_white_county",
                       "placebo_anticorrupt_2012:share_pop_black_county",
                       "placebo_anticorrupt_2012:share_pop_hispanic_county",
                       "placebo_anticorrupt_2012:share_pop_asian_county",
                       "placebo_anticorrupt_2012:share_pop_cn_county",
                       
                       "placebo_anticorrupt_2012:share_foreign_born_county",
                       "placebo_anticorrupt_2012:share_edu_ba_above_county",
                       "placebo_anticorrupt_2012:share_enroll_county",
                       "placebo_anticorrupt_2012:ln_pop_density_county",
                       "placebo_anticorrupt_2012:effective_tax_county", 
                       "placebo_anticorrupt_2012:ipw_county",
                       "placebo_anticorrupt_2012:employ_rate_county", 
                       "placebo_anticorrupt_2012:median_household_income_county",
                       "placebo_anticorrupt_2012:vacancy_county")

# set variable order
var.order.placebo <- c(1,
                       24, 27, 28,
                       25, 29, 30,
                       26, 31, 32,
                       2:23,
                       33:54)

# save
#sink(file.path(MAIN_DIR, "Table-D2.tex"))
sink(file.path(MAIN_DIR, "Table-D2.txt"))
stargazer(did.placebo,
          #type = "latex",
          type = "text",
          title = "Placebo DiD Regression Results: County-Level Presidential Vote",
          dep.var.labels = "Two-Party Democratic Vote Share, 2008 vs. 2012",
          dep.var.labels.include = TRUE,
          column.labels = mod.names.placebo,
          order = var.order.placebo,
          covariate.labels = replaceVarName(var.vec = var.label.placebo,
                                            var.df = var.df),
          add.lines = list(c("Fixed Effects: County",
                             rep("Yes", 3)),
                           c("Number of Counties",
                             formatC(n_distinct(did.placebo.bi.base$fe$county_fips), big.mark = ","),
                             formatC(n_distinct(did.placebo.bi.full$fe$county_fips), big.mark = ","),
                             formatC(n_distinct(did.placebo.tri.full$fe$county_fips), big.mark = ","))),
          label = "tb:did-pres-placebo",
          single.row = FALSE,
          se = did.placebo.cse,
          df = FALSE,
          font.size = "tiny",
          star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          star.char = c("+", "*", "**", "***"),
          notes.align = "l",
          notes.append = FALSE)
sink()


## Figure 2: The Effect of Chinese FREI Exposure on Two-Party Democratic Vote Shares -----
# set vars to extract
vars.select <- c("anticorrupt:cn_ug_sqmi_cat_2012_bi_high", 
                 "placebo_anticorrupt_2012:cn_ug_sqmi_cat_2012_bi_high")

# put DiD results in a list
fm.list <- list("Full (2012-2016)" = did.12.16.bi.full,
                "Full (2012-2020)" = did.12.20.bi.full,
                "Full (placebo)" = did.placebo.bi.full)
  
# extract key coefs and clustered SEs
coef.df <- map_df(fm.list, function(x){ 
  
  out <- tibble(var = rownames(x$coefficients),
                coef = coef(x),
                cse = x$cse)
  out <- out %>% 
    filter(var %in% vars.select)
}, .id = NULL) 

# add model name
model <- c("2012 vs. 2016",
           "2012 vs. 2020",
           "2008 vs. 2012")
  
# set shape
coef.shape <- c(rep("treat", 2), "placebo")

# combine
coef.df <- cbind(model, coef.df, coef.shape)

# set alpha level
alpha <- 0.05

# calculate C.I.
coef.df <- coef.df %>%
  mutate(model = factor(model, 
                        levels = c("2008 vs. 2012",
                                   "2012 vs. 2016",
                                   "2012 vs. 2020")),
         multiplier = qnorm(1 - alpha/2),
         coef = as.numeric(coef),
         cse = as.numeric(cse),
         lwr = coef - multiplier * cse,
         upr = coef + multiplier * cse) 

# set parameters
display.brewer.pal(9, "Set1")
brewer.pal(9, "Set1")
colors <- c("#E41A1C", "black")
shapes <- c(17, 16)
axis.title.size <- 16

# plot
plot.coef <- ggplot(data = coef.df,
                    aes(x = model, y = coef,
                        ymin = lwr, 
                        ymax = upr,
                        shape = factor(coef.shape)
                    )) +
  geom_pointrange(size = 0.8) +
  geom_hline(yintercept = 0,
             color = I(hsv(0/12, 7/12, 7/12)), 
             linetype = "dashed") + 
  scale_shape_manual(values = shapes) + 
  scale_y_continuous("Estimated DiD Effect on Two-Party Dem. Vote Share",
                     limits = c(-0.02, 0.01)) +
  theme_bw() +  
  theme(legend.position = "none",
        axis.title.y = element_text(size = axis.title.size - 2,
                                    margin = margin(0, 20, 0, 0)),
        axis.text.y = element_text(size = axis.title.size - 1),
        axis.title.x = element_blank(),
        axis.text.x = element_text(size = axis.title.size - 1),
        plot.title = element_blank(),
        panel.grid = element_blank()) 

plot.coef

# print
pdf(file = paste(MAIN_DIR, "Figure-2.pdf", sep = "/"), 
    width = 6, height = 5)
print(plot.coef)
dev.off()

setEPS()
cairo_ps(paste(MAIN_DIR, "Figure-2.eps", sep = "/"), 
         width = 6, height = 5)
print(plot.coef)
dev.off()


## compare to simulated effects of population density: 2012-2016 ---------------
# 25th percentile to 75 percentile
df.treat.1.2012.pop.cut <- df.treat.1 %>% 
  filter(year == 2012) %>%
  pull(ln_pop_density_county) %>%
  quantile(probs = seq(0, 1, by = 1/4))

range.key <- df.treat.1.2012.pop.cut[c(2,4)]
range.key

# set parameters
sim.df <- df.treat.1
fm <- did.12.16.bi.full
change.var <- "ln_pop_density_county"
moderator <- "anticorrupt"
interaction <- "anticorrupt:ln_pop_density_county"
range.key <- range.key
n.draws <- 1000
#dv.log <- TRUE
dv.log <- FALSE

# set seed for simulation
seed <- 1234
set.seed(seed)

beta.draws <- mvrnorm(n.draws, coef(fm), fm$clustervcv)

# extract FEs
fe <- getfe(fm)
fe.matrix <- t(fe[c("effect")])
row.names(fe.matrix) <- NULL
fe.matrix <- fe.matrix[rep(1:nrow(fe.matrix),
                           each = n.draws),]

# combine with betas
beta.draws <- cbind(beta.draws, fe.matrix)

# extract model matrix
model.df <- as.data.frame(fm$X)

# combine with FEs
fe.df <- bind_rows(fm$fe)
model.df.full <- cbind(model.df, fe.df)

# create dummies for FEs
model.df.full <- model.df.full %>%
  mutate(new = 1) %>% 
  spread(county_fips, new, fill = 0)

# for each value of change var
sim.out.dd.density.sqmi <- map_dfr(1:length(range.key), function(x){
  # print
  cat(paste("Simulating predicted probabilities for ", x, " of ",
            length(range.key), " ", change.var, " values\n", sep = ""))
  
  # create temp data
  newd <- model.df.full
  
  # replace change var value x
  newd[,change.var] <- range.key[[x]]
  
  # # replace moderator value y
  # newd[,moderator] <- range.mod[[y]]
  
  # calculate new interaction
  newd[,interaction] <- range.key[[x]]*newd[,moderator]
  
  # matrix multiplication for systematic component
  sys.com <- as.matrix(newd) %*% t(beta.draws)
  
  # average over observations for each draw
  ev <- as.data.frame(sys.com) %>%
    summarise_all(mean)
  
  if(dv.log){
    # convert to original scale of outcome
    ev <- exp(ev) %>%
      mutate(setx = range.key[[x]])
    
    return(ev)
    
  } else{
    ev <- ev %>%
      mutate(setx = range.key[[x]])
    
    return(ev)
  }
})

# calculate pv for each setx and take quantities of interest
ev.dd.density <- sim.out.dd.density.sqmi %>%
  gather(draw, value, -setx) %>%
  group_by(setx) %>%
  dplyr::summarize(mean = mean(value, na.rm = TRUE),
                   lwr = quantile(value, probs = 0.025),
                   upr = quantile(value, probs = 0.975)) %>%
  ungroup()

# population density effect size
ev.dd.density
(ev.dd.density[2, "mean"] - ev.dd.density[1, "mean"]) * 100

# compare Chinese FREI and pop. density effect size
coef(did.12.16.bi.full)["anticorrupt:cn_ug_sqmi_cat_2012_bi_high"] / (ev.dd.density[2, "mean"] - ev.dd.density[1, "mean"])


## DiD analysis: Exclude Outlier Counties --------------------------------------
## period: 2012 vs. 2016

# identify outliers
outlier.vec.12.16 <- boxplot.stats(df.treat.1$china_undergraduate_county_sqmi_2012)$out
outlier.vec.12.16 <- outlier.vec.12.16[outlier.vec.12.16 > summary(df.treat.1$china_undergraduate_county_sqmi_2012)["3rd Qu."]]
outlier.vec.12.16

# subset to outliers
outlier.df.12.16 <- df.treat.1 %>%
  filter(china_undergraduate_county_sqmi_2012 %in% outlier.vec.12.16) %>%
  mutate(county_state_name = paste(county_name, state_name, sep = ", "))

# extract outliers
unique(outlier.df.12.16$county_state_name)
n_distinct(outlier.df.12.16$county_fips)

# exclude outliers
df.treat.1.exclude.outliers.12.16 <- df.treat.1 %>% 
  filter(!(county_fips %in% outlier.df.12.16$county_fips))

# dichotomous full model
did.12.16.bi.full.exclude.outliers <- felm(dem_share ~ 
                                             anticorrupt + 
                                             cn_ug_sqmi_cat_2012_bi_high:anticorrupt +
                                             
                                             cn_g_sqmi_cat_2012_bi:anticorrupt +
                                             ind_ug_sqmi_cat_2012_bi:anticorrupt +
                                             
                                             share_pop_county_female * anticorrupt +
                                             
                                             share_pop_5_17_county * anticorrupt +
                                             share_pop_18_24_county * anticorrupt + 
                                             share_pop_25_34_county * anticorrupt +
                                             share_pop_35_44_county * anticorrupt +
                                             share_pop_45_54_county * anticorrupt +
                                             share_pop_55_64_county * anticorrupt +
                                             share_pop_65_county * anticorrupt + 
                                             
                                             share_pop_white_county * anticorrupt + 
                                             share_pop_black_county * anticorrupt + 
                                             share_pop_hispanic_county * anticorrupt + 
                                             share_pop_asian_county * anticorrupt + 
                                             share_pop_cn_county * anticorrupt + 
                                             
                                             share_foreign_born_county * anticorrupt + 
                                             
                                             share_edu_ba_above_county * anticorrupt +
                                             share_enroll_county * anticorrupt + 
                                             ln_pop_density_county * anticorrupt + 
                                             
                                             effective_tax_county * anticorrupt + 
                                             
                                             ipw_county * anticorrupt + 
                                             
                                             employ_rate_county * anticorrupt + 
                                             median_household_income_county * anticorrupt + 
                                             vacancy_county * anticorrupt 
                                           | county_fips | 0 | county_fips, 
                                           keepX = TRUE,
                                           data = df.treat.1.exclude.outliers.12.16)

summary(did.12.16.bi.full.exclude.outliers)

# trichotomous full model
did.12.16.tri.full.exclude.outliers <- felm(dem_share ~ 
                                              anticorrupt + 
                                              cn_ug_sqmi_cat_2012_tri_medium:anticorrupt +
                                              cn_ug_sqmi_cat_2012_tri_high:anticorrupt +
                                              
                                              cn_g_sqmi_cat_2012_tri:anticorrupt +
                                              ind_ug_sqmi_cat_2012_tri:anticorrupt +
                                             
                                              share_pop_county_female * anticorrupt +
                                              
                                              share_pop_5_17_county * anticorrupt +
                                              share_pop_18_24_county * anticorrupt + 
                                              share_pop_25_34_county * anticorrupt +
                                              share_pop_35_44_county * anticorrupt +
                                              share_pop_45_54_county * anticorrupt +
                                              share_pop_55_64_county * anticorrupt +
                                              share_pop_65_county * anticorrupt + 
                                              
                                              share_pop_white_county * anticorrupt + 
                                              share_pop_black_county * anticorrupt + 
                                              share_pop_hispanic_county * anticorrupt + 
                                              share_pop_asian_county * anticorrupt + 
                                              share_pop_cn_county * anticorrupt + 
                                              
                                              share_foreign_born_county * anticorrupt + 
                                              
                                              share_edu_ba_above_county * anticorrupt +
                                              share_enroll_county * anticorrupt + 
                                              ln_pop_density_county * anticorrupt + 
                                              
                                              effective_tax_county * anticorrupt + 
                                              
                                              ipw_county * anticorrupt + 
                                              
                                              employ_rate_county * anticorrupt + 
                                              median_household_income_county * anticorrupt + 
                                              vacancy_county * anticorrupt 
                                            | county_fips | 0 | county_fips, 
                                            keepX = TRUE,
                                            data = df.treat.1.exclude.outliers.12.16)

summary(did.12.16.tri.full.exclude.outliers)


## DiD analysis: Exclude Outlier Counties --------------------------------------
## period: 2012 vs. 2020

# identify outliers
outlier.vec.12.20 <- boxplot.stats(df.treat.2$china_undergraduate_county_sqmi_2012)$out
outlier.vec.12.20 <- outlier.vec.12.20[outlier.vec.12.20 > summary(df.treat.2$china_undergraduate_county_sqmi_2012)["3rd Qu."]]
outlier.vec.12.20

# subset to outliers
outlier.df.12.20 <- df.treat.2 %>%
  filter(china_undergraduate_county_sqmi_2012 %in% outlier.vec.12.20) %>%
  mutate(county_state_name = paste(county_name, state_name, sep = ", "))

# extract outliers
unique(outlier.df.12.20$county_state_name)
n_distinct(outlier.df.12.20$county_fips)

# exclude outliers
df.treat.2.exclude.outliers.12.20 <- df.treat.2 %>% 
  filter(!(county_fips %in% outlier.df.12.20$county_fips))

# dichotomous full model
did.12.20.bi.full.exclude.outliers <- felm(dem_share ~ 
                                             anticorrupt + 
                                             cn_ug_sqmi_cat_2012_bi_high:anticorrupt +
                                             
                                             cn_g_sqmi_cat_2012_bi:anticorrupt +
                                             ind_ug_sqmi_cat_2012_bi:anticorrupt +
                                             
                                             share_pop_county_female * anticorrupt +
                                             
                                             share_pop_5_17_county * anticorrupt +
                                             share_pop_18_24_county * anticorrupt + 
                                             share_pop_25_34_county * anticorrupt +
                                             share_pop_35_44_county * anticorrupt +
                                             share_pop_45_54_county * anticorrupt +
                                             share_pop_55_64_county * anticorrupt +
                                             share_pop_65_county * anticorrupt + 
                                             
                                             share_pop_white_county * anticorrupt + 
                                             share_pop_black_county * anticorrupt + 
                                             share_pop_hispanic_county * anticorrupt + 
                                             share_pop_asian_county * anticorrupt + 
                                             share_pop_cn_county * anticorrupt + 
                                             
                                             share_foreign_born_county * anticorrupt + 
                                             
                                             share_edu_ba_above_county * anticorrupt +
                                             share_enroll_county * anticorrupt + 
                                             ln_pop_density_county * anticorrupt + 
                                             
                                             effective_tax_county * anticorrupt + 
                                             
                                             ipw_county * anticorrupt + 
                                             
                                             employ_rate_county * anticorrupt + 
                                             median_household_income_county * anticorrupt + 
                                             vacancy_county * anticorrupt 
                                           | county_fips | 0 | county_fips, 
                                           keepX = TRUE,
                                           data = df.treat.2.exclude.outliers.12.20)

summary(did.12.20.bi.full.exclude.outliers)


# trichotomous full model
did.12.20.tri.full.exclude.outliers <- felm(dem_share ~ 
                                              anticorrupt + 
                                              cn_ug_sqmi_cat_2012_tri_medium:anticorrupt +
                                              cn_ug_sqmi_cat_2012_tri_high:anticorrupt +
                                              
                                              cn_g_sqmi_cat_2012_tri:anticorrupt +
                                              ind_ug_sqmi_cat_2012_tri:anticorrupt +
                                             
                                              share_pop_county_female * anticorrupt +
                                              
                                              share_pop_5_17_county * anticorrupt +
                                              share_pop_18_24_county * anticorrupt + 
                                              share_pop_25_34_county * anticorrupt +
                                              share_pop_35_44_county * anticorrupt +
                                              share_pop_45_54_county * anticorrupt +
                                              share_pop_55_64_county * anticorrupt +
                                              share_pop_65_county * anticorrupt + 
                                              
                                              share_pop_white_county * anticorrupt + 
                                              share_pop_black_county * anticorrupt + 
                                              share_pop_hispanic_county * anticorrupt + 
                                              share_pop_asian_county * anticorrupt + 
                                              share_pop_cn_county * anticorrupt + 
                                              
                                              share_foreign_born_county * anticorrupt + 
                                              
                                              share_edu_ba_above_county * anticorrupt +
                                              share_enroll_county * anticorrupt + 
                                              ln_pop_density_county * anticorrupt + 
                                              
                                              effective_tax_county * anticorrupt + 
                                              
                                              ipw_county * anticorrupt + 
                                              
                                              employ_rate_county * anticorrupt + 
                                              median_household_income_county * anticorrupt + 
                                              vacancy_county * anticorrupt 
                                            | county_fips | 0 | county_fips, 
                                            keepX = TRUE,
                                            data = df.treat.2.exclude.outliers.12.20)

summary(did.12.20.tri.full.exclude.outliers)


## Appendix Table D.3: DiD Regression Results: Excluding Outlier Counties ------
# set outputs
did.exclude.outliers <- list(did.12.16.bi.full.exclude.outliers,
                             did.12.16.tri.full.exclude.outliers,
                             did.12.20.bi.full.exclude.outliers,
                             did.12.20.tri.full.exclude.outliers)

# extract clustered SEs
did.exclude.outliers.cse <- list(did.12.16.bi.full.exclude.outliers$cse,
                                 did.12.16.tri.full.exclude.outliers$cse,
                                 did.12.20.bi.full.exclude.outliers$cse,
                                 did.12.20.tri.full.exclude.outliers$cse)

# set model names
mod.names.exclude.outliers <- c("2012 vs. 2016: Bi",
                                "2012 vs. 2016: Tri.",
                                "2012 vs. 2020: Bi",
                                "2012 vs. 2020: Tri.")

# set variable order
var.order.exclude.outliers <- c("^anticorrupt$",
                                "^anticorrupt:cn_ug_sqmi_cat_2012_bi_high$",
                                "^anticorrupt:cn_ug_sqmi_cat_2012_tri_medium$",
                                "^anticorrupt:cn_ug_sqmi_cat_2012_tri_high$",
                                
                                "^anticorrupt:cn_g_sqmi_cat_2012_biCN-G-high$",
                                "^anticorrupt:cn_g_sqmi_cat_2012_triCN-G-medium$",
                                "^anticorrupt:cn_g_sqmi_cat_2012_triCN-G-high$",
                                
                                "^anticorrupt:ind_ug_sqmi_cat_2012_biIND-UG-high$",
                                "^anticorrupt:ind_ug_sqmi_cat_2012_triIND-UG-medium$",
                                "^anticorrupt:ind_ug_sqmi_cat_2012_triIND-UG-high$",
                                
                                "^share_pop_county_female$",
                                "^share_pop_5_17_county$",
                                "^share_pop_18_24_county$",
                                "^share_pop_25_34_county$",
                                "^share_pop_35_44_county$",
                                "^share_pop_45_54_county$",
                                "^share_pop_55_64_county$",
                                "^share_pop_65_county$",
                                
                                "^share_pop_white_county$",
                                "^share_pop_black_county$",
                                "^share_pop_hispanic_county$",
                                "^share_pop_asian_county$",
                                "^share_pop_cn_county$",
                                
                                "^share_foreign_born_county$",
                                "^share_edu_ba_above_county$",
                                "^share_enroll_county$",
                                "^ln_pop_density_county$",
                                "^effective_tax_county$", 
                                "^ipw_county$",
                                "^employ_rate_county$", 
                                "^median_household_income_county$",
                                "^vacancy_county$",
                                
                                "^anticorrupt:share_pop_county_female$",
                                "^anticorrupt:share_pop_5_17_county$",
                                "^anticorrupt:share_pop_18_24_county$",
                                "^anticorrupt:share_pop_25_34_county$",
                                "^anticorrupt:share_pop_35_44_county$",
                                "^anticorrupt:share_pop_45_54_county$",
                                "^anticorrupt:share_pop_55_64_county$",
                                "^anticorrupt:share_pop_65_county$",
                                
                                "^anticorrupt:share_pop_white_county$",
                                "^anticorrupt:share_pop_black_county$",
                                "^anticorrupt:share_pop_hispanic_county$",
                                "^anticorrupt:share_pop_asian_county$",
                                "^anticorrupt:share_pop_cn_county$",
                                
                                "^anticorrupt:share_foreign_born_county$",
                                "^anticorrupt:share_edu_ba_above_county$",
                                "^anticorrupt:share_enroll_county$",
                                "^anticorrupt:ln_pop_density_county$",
                                "^anticorrupt:effective_tax_county$", 
                                "^anticorrupt:ipw_county$",
                                "^anticorrupt:employ_rate_county$", 
                                "^anticorrupt:median_household_income_county$",
                                "^anticorrupt:vacancy_county$")

# set var label
var.label.exclude.outliers <- str_replace_all(var.order.exclude.outliers, "\\^", "")
var.label.exclude.outliers <- str_replace_all(var.label.exclude.outliers, "\\$", "")

# save
#sink(file.path(MAIN_DIR, "Table-D3.tex"))
sink(file.path(MAIN_DIR, "Table-D3.txt"))
stargazer(did.exclude.outliers,
          se = did.exclude.outliers.cse,
          #type = "latex",
          type = "text",
          title = "DiD Regression Results: Excluding Outlier Counties",
          dep.var.labels = "Two-Party Democratic Vote Share",
          dep.var.labels.include = TRUE,
          column.labels = mod.names.exclude.outliers,
          order = var.order.exclude.outliers,
          covariate.labels = replaceVarName(var.vec = var.label.exclude.outliers,
                                            var.df = var.df),
          add.lines = list(c("Fixed Effects: County",
                             rep("Yes", 4)),
                           c("Number of Counties",
                             formatC(n_distinct(did.12.16.bi.full.exclude.outliers$fe$county_fips), big.mark = ","),
                             formatC(n_distinct(did.12.16.tri.full.exclude.outliers$fe$county_fips), big.mark = ","),
                             formatC(n_distinct(did.12.20.bi.full.exclude.outliers$fe$county_fips), big.mark = ","),
                             formatC(n_distinct(did.12.20.tri.full.exclude.outliers$fe$county_fips), big.mark = ","))),
          label = "tb:did-pres-exclude-outliers",
          single.row = FALSE,
          df = FALSE,
          font.size = "tiny",
          star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          star.char = c("+", "*", "**", "***"),
          notes.align = "l",
          notes.append = FALSE)
sink()

